#!/usr/bin/env python
# -*- coding:utf-8 -*-

from article_spider.utils.tools import Tools
from article_spider.settings import AJAX_DOMAINS
from tld import get_tld
import tldextract
import re

_content = '装甲版则是 429 美元（2940 元）。（来源：cnBeta）头图来源：视觉中国 员工可能卷入贩毒事件。（来源：凤凰科技）'
content = Tools.content_filter(_content)
print(content)

url = 'https://www.toutiao.com/group/6591421497967903240/'
print(url in AJAX_DOMAINS)

print(get_tld(url))
o = tldextract.extract(url)
domain = "{}.{}".format(o.domain, o.suffix)
print(domain)
print(domain in AJAX_DOMAINS)
start_urls = [
    'https://www.bufanbiz.com/api/website/articles/?p={}&n=20&type={}',
]
allowed_types = [
    '', '新消费', '金融科技', '出行物流', '文娱', '人工智能', '人物', '干货', '其他'
]

for i in range(1, 10):
    for j in allowed_types:
        url = str(start_urls[0]).format(i, j)
        print(url)

html_text = '<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">{"a": 10}</pre></body></html>'
dr = re.compile(r'<[^>]+>',re.S)
dd = dr.sub('',html_text)
print(dd)

class_attr = 'zbtn znxt js-n-1536131599561 js-disabled'
print('js-disabled' in str(class_attr))


str = 'fdsdfsd'
